{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Combining Pandas Objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "pd.set_option('max_columns', 7,'display.expand_frame_repr', True, # 'max_rows', 10, \n",
    "    'max_colwidth', 9, 'max_rows', 10, #'precision', 2\n",
    ")#, 'width', 45)\n",
    "pd.set_option('display.width', 65)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Introduction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Appending new rows to DataFrames"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Cornelia</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Abbas</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Penelope</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Niko</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Name  Age\n",
       "0  Cornelia   70\n",
       "1     Abbas   69\n",
       "2  Penelope    4\n",
       "3      Niko    2"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "names = pd.read_csv('data/names.csv')\n",
    "names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Cornelia</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Abbas</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Penelope</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Niko</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Aria</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Name  Age\n",
       "0  Cornelia   70\n",
       "1     Abbas   69\n",
       "2  Penelope    4\n",
       "3      Niko    2\n",
       "4      Aria    1"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_data_list = ['Aria', 1]\n",
    "names.loc[4] = new_data_list\n",
    "names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Cornelia</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Abbas</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Penelope</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Niko</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Aria</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>five</th>\n",
       "      <td>Zach</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Name  Age\n",
       "0     Cornelia   70\n",
       "1        Abbas   69\n",
       "2     Penelope    4\n",
       "3         Niko    2\n",
       "4         Aria    1\n",
       "five      Zach    3"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "names.loc['five'] = ['Zach', 3]\n",
    "names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Cornelia</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Abbas</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Penelope</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Niko</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Aria</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>five</th>\n",
       "      <td>Zach</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Zayd</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Name  Age\n",
       "0     Cornelia   70\n",
       "1        Abbas   69\n",
       "2     Penelope    4\n",
       "3         Niko    2\n",
       "4         Aria    1\n",
       "five      Zach    3\n",
       "6         Zayd    2"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "names.loc[len(names)] = {'Name':'Zayd', 'Age':2}\n",
    "names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Cornelia</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Abbas</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Penelope</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Niko</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Aria</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>five</th>\n",
       "      <td>Zach</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Zayd</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Dean</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Name  Age\n",
       "0     Cornelia   70\n",
       "1        Abbas   69\n",
       "2     Penelope    4\n",
       "3         Niko    2\n",
       "4         Aria    1\n",
       "five      Zach    3\n",
       "6         Zayd    2\n",
       "7         Dean   32"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "names.loc[len(names)] = pd.Series({'Age':32, 'Name':'Dean'})\n",
    "names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "Can only append a Series if ignore_index=True or if the Series has a name",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-7-ff8da9249302>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mnames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/names.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mnames\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'Name'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'Aria'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Age'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/.env/364/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mappend\u001b[0;34m(self, other, ignore_index, verify_integrity, sort)\u001b[0m\n\u001b[1;32m   7096\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   7097\u001b[0m                 raise TypeError(\n\u001b[0;32m-> 7098\u001b[0;31m                     \u001b[0;34m\"Can only append a Series if ignore_index=True\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   7099\u001b[0m                     \u001b[0;34m\" or if the Series has a name\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   7100\u001b[0m                 )\n",
      "\u001b[0;31mTypeError\u001b[0m: Can only append a Series if ignore_index=True or if the Series has a name"
     ]
    }
   ],
   "source": [
    "names = pd.read_csv('data/names.csv')\n",
    "names.append({'Name':'Aria', 'Age':1})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Cornelia</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Abbas</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Penelope</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Niko</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Aria</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Name  Age\n",
       "0  Cornelia   70\n",
       "1     Abbas   69\n",
       "2  Penelope    4\n",
       "3      Niko    2\n",
       "4      Aria    1"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "names.append({'Name':'Aria', 'Age':1}, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Canada</th>\n",
       "      <td>Cornelia</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Canada</th>\n",
       "      <td>Abbas</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>USA</th>\n",
       "      <td>Penelope</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>USA</th>\n",
       "      <td>Niko</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Name  Age\n",
       "Canada  Cornelia   70\n",
       "Canada     Abbas   69\n",
       "USA     Penelope    4\n",
       "USA         Niko    2"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "names.index = ['Canada', 'Canada', 'USA', 'USA']\n",
    "names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Name    Zach\n",
       "Age        3\n",
       "Name: 4, dtype: object"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series({'Name': 'Zach', 'Age': 3}, name=len(names))\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Canada</th>\n",
       "      <td>Cornelia</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Canada</th>\n",
       "      <td>Abbas</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>USA</th>\n",
       "      <td>Penelope</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>USA</th>\n",
       "      <td>Niko</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zach</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Name  Age\n",
       "Canada  Cornelia   70\n",
       "Canada     Abbas   69\n",
       "USA     Penelope    4\n",
       "USA         Niko    2\n",
       "4           Zach    3"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "names.append(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Canada</th>\n",
       "      <td>Cornelia</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Canada</th>\n",
       "      <td>Abbas</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>USA</th>\n",
       "      <td>Penelope</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>USA</th>\n",
       "      <td>Niko</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zach</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>USA</th>\n",
       "      <td>Zayd</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Name  Age\n",
       "Canada  Cornelia   70\n",
       "Canada     Abbas   69\n",
       "USA     Penelope    4\n",
       "USA         Niko    2\n",
       "4           Zach    3\n",
       "USA         Zayd    2"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s1 = pd.Series({'Name': 'Zach', 'Age': 3}, name=len(names))\n",
    "s2 = pd.Series({'Name': 'Zayd', 'Age': 2}, name='USA')\n",
    "names.append([s1, s2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>playerID</th>\n",
       "      <th>yearID</th>\n",
       "      <th>stint</th>\n",
       "      <th>...</th>\n",
       "      <th>SH</th>\n",
       "      <th>SF</th>\n",
       "      <th>GIDP</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>altuv...</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>bregm...</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>castr...</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>corre...</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>gatti...</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>reedaj01</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>sprin...</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>tucke...</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>valbu...</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>white...</td>\n",
       "      <td>2016</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>16 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    playerID  yearID  stint  ...   SH   SF  GIDP\n",
       "0   altuv...    2016      1  ...  3.0  7.0  15.0\n",
       "1   bregm...    2016      1  ...  0.0  1.0   1.0\n",
       "2   castr...    2016      1  ...  1.0  0.0   9.0\n",
       "3   corre...    2016      1  ...  0.0  3.0  12.0\n",
       "4   gatti...    2016      1  ...  0.0  5.0  12.0\n",
       "..       ...     ...    ...  ...  ...  ...   ...\n",
       "11  reedaj01    2016      1  ...  0.0  1.0   1.0\n",
       "12  sprin...    2016      1  ...  0.0  1.0  12.0\n",
       "13  tucke...    2016      1  ...  0.0  0.0   2.0\n",
       "14  valbu...    2016      1  ...  3.0  2.0   5.0\n",
       "15  white...    2016      1  ...  0.0  2.0   6.0\n",
       "\n",
       "[16 rows x 22 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bball_16 = pd.read_csv('data/baseball16.csv')\n",
    "bball_16"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'playerID': 'altuvjo01',\n",
       " 'yearID': 2016,\n",
       " 'stint': 1,\n",
       " 'teamID': 'HOU',\n",
       " 'lgID': 'AL',\n",
       " 'G': 161,\n",
       " 'AB': 640,\n",
       " 'R': 108,\n",
       " 'H': 216,\n",
       " '2B': 42,\n",
       " '3B': 5,\n",
       " 'HR': 24,\n",
       " 'RBI': 96.0,\n",
       " 'SB': 30.0,\n",
       " 'CS': 10.0,\n",
       " 'BB': 60,\n",
       " 'SO': 70.0,\n",
       " 'IBB': 11.0,\n",
       " 'HBP': 7.0,\n",
       " 'SH': 3.0,\n",
       " 'SF': 7.0,\n",
       " 'GIDP': 15.0}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dict = bball_16.iloc[0].to_dict()\n",
    "data_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'playerID': '',\n",
       " 'yearID': nan,\n",
       " 'stint': nan,\n",
       " 'teamID': '',\n",
       " 'lgID': '',\n",
       " 'G': nan,\n",
       " 'AB': nan,\n",
       " 'R': nan,\n",
       " 'H': nan,\n",
       " '2B': nan,\n",
       " '3B': nan,\n",
       " 'HR': nan,\n",
       " 'RBI': nan,\n",
       " 'SB': nan,\n",
       " 'CS': nan,\n",
       " 'BB': nan,\n",
       " 'SO': nan,\n",
       " 'IBB': nan,\n",
       " 'HBP': nan,\n",
       " 'SH': nan,\n",
       " 'SF': nan,\n",
       " 'GIDP': nan}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_data_dict = {k: '' if isinstance(v, str) else\n",
    "    np.nan for k, v in data_dict.items()}\n",
    "new_data_dict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There's more..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "playerID    a\n",
       "yearID      9\n",
       "stint       0\n",
       "teamID      d\n",
       "lgID        a\n",
       "           ..\n",
       "IBB         4\n",
       "HBP         8\n",
       "SH          2\n",
       "SF          8\n",
       "GIDP        5\n",
       "Name: 16, Length: 22, dtype: object"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random_data = []\n",
    "for i in range(1000):   # doctest: +SKIP\n",
    "    d = dict()\n",
    "    for k, v in data_dict.items():\n",
    "        if isinstance(v, str):\n",
    "            d[k] = np.random.choice(list('abcde'))\n",
    "        else:\n",
    "            d[k] = np.random.randint(10)\n",
    "    random_data.append(pd.Series(d, name=i + len(bball_16)))\n",
    "random_data[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concatenating multiple DataFrames together"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "stocks_2016 = pd.read_csv('data/stocks_2016.csv',\n",
    "    index_col='Symbol')\n",
    "stocks_2017 = pd.read_csv('data/stocks_2017.csv',\n",
    "    index_col='Symbol')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80</td>\n",
       "      <td>95</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>80</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40</td>\n",
       "      <td>55</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Shares  Low  High\n",
       "Symbol                   \n",
       "AAPL        80   95   110\n",
       "TSLA        50   80   130\n",
       "WMT         40   55    70"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_2016"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>50</td>\n",
       "      <td>120</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>100</td>\n",
       "      <td>30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>87</td>\n",
       "      <td>75</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLB</th>\n",
       "      <td>20</td>\n",
       "      <td>55</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>500</td>\n",
       "      <td>15</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Shares  Low  High\n",
       "Symbol                   \n",
       "AAPL        50  120   140\n",
       "GE         100   30    40\n",
       "IBM         87   75    95\n",
       "SLB         20   55    85\n",
       "TXN        500   15    23\n",
       "TSLA       100  100   300"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_2017"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80</td>\n",
       "      <td>95</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>80</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40</td>\n",
       "      <td>55</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>50</td>\n",
       "      <td>120</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>100</td>\n",
       "      <td>30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>87</td>\n",
       "      <td>75</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLB</th>\n",
       "      <td>20</td>\n",
       "      <td>55</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>500</td>\n",
       "      <td>15</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Shares  Low  High\n",
       "Symbol                   \n",
       "AAPL        80   95   110\n",
       "TSLA        50   80   130\n",
       "WMT         40   55    70\n",
       "AAPL        50  120   140\n",
       "GE         100   30    40\n",
       "IBM         87   75    95\n",
       "SLB         20   55    85\n",
       "TXN        500   15    23\n",
       "TSLA       100  100   300"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s_list = [stocks_2016, stocks_2017]\n",
    "pd.concat(s_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Year</th>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">2016</th>\n",
       "      <th>AAPL</th>\n",
       "      <td>80</td>\n",
       "      <td>95</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>80</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40</td>\n",
       "      <td>55</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">2017</th>\n",
       "      <th>AAPL</th>\n",
       "      <td>50</td>\n",
       "      <td>120</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>100</td>\n",
       "      <td>30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>87</td>\n",
       "      <td>75</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLB</th>\n",
       "      <td>20</td>\n",
       "      <td>55</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>500</td>\n",
       "      <td>15</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Shares  Low  High\n",
       "Year Symbol                   \n",
       "2016 AAPL        80   95   110\n",
       "     TSLA        50   80   130\n",
       "     WMT         40   55    70\n",
       "2017 AAPL        50  120   140\n",
       "     GE         100   30    40\n",
       "     IBM         87   75    95\n",
       "     SLB         20   55    85\n",
       "     TXN        500   15    23\n",
       "     TSLA       100  100   300"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat(s_list, keys=['2016', '2017'],\n",
    "   names=['Year', 'Symbol'])  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/matt/.env/364/lib/python3.6/site-packages/ipykernel_launcher.py:2: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
      "of pandas will change to not sort by default.\n",
      "\n",
      "To accept the future behavior, pass 'sort=False'.\n",
      "\n",
      "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
      "\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>Year</th>\n",
       "      <th colspan=\"3\" halign=\"left\">2016</th>\n",
       "      <th colspan=\"3\" halign=\"left\">2017</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>140.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>87.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>95.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLB</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>85.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>300.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>500.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Year   2016                2017              \n",
       "     Shares   Low   High Shares    Low   High\n",
       "AAPL   80.0  95.0  110.0   50.0  120.0  140.0\n",
       "GE      NaN   NaN    NaN  100.0   30.0   40.0\n",
       "IBM     NaN   NaN    NaN   87.0   75.0   95.0\n",
       "SLB     NaN   NaN    NaN   20.0   55.0   85.0\n",
       "TSLA   50.0  80.0  130.0  100.0  100.0  300.0\n",
       "TXN     NaN   NaN    NaN  500.0   15.0   23.0\n",
       "WMT    40.0  55.0   70.0    NaN    NaN    NaN"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat(s_list, keys=['2016', '2017'],\n",
    "    axis='columns', names=['Year', None])    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>Year</th>\n",
       "      <th colspan=\"3\" halign=\"left\">2016</th>\n",
       "      <th colspan=\"3\" halign=\"left\">2017</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80</td>\n",
       "      <td>95</td>\n",
       "      <td>110</td>\n",
       "      <td>50</td>\n",
       "      <td>120</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>80</td>\n",
       "      <td>130</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Year     2016            2017          \n",
       "       Shares Low High Shares  Low High\n",
       "Symbol                                 \n",
       "AAPL       80  95  110     50  120  140\n",
       "TSLA       50  80  130    100  100  300"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat(s_list, join='inner', keys=['2016', '2017'],\n",
    "    axis='columns', names=['Year', None])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There's more..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80</td>\n",
       "      <td>95</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>80</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40</td>\n",
       "      <td>55</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>50</td>\n",
       "      <td>120</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>100</td>\n",
       "      <td>30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>87</td>\n",
       "      <td>75</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLB</th>\n",
       "      <td>20</td>\n",
       "      <td>55</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>500</td>\n",
       "      <td>15</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Shares  Low  High\n",
       "Symbol                   \n",
       "AAPL        80   95   110\n",
       "TSLA        50   80   130\n",
       "WMT         40   55    70\n",
       "AAPL        50  120   140\n",
       "GE         100   30    40\n",
       "IBM         87   75    95\n",
       "SLB         20   55    85\n",
       "TXN        500   15    23\n",
       "TSLA       100  100   300"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_2016.append(stocks_2017)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Understanding the differences between concat, join, and merge"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80</td>\n",
       "      <td>95</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>80</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40</td>\n",
       "      <td>55</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Shares  Low  High\n",
       "Symbol                   \n",
       "AAPL        80   95   110\n",
       "TSLA        50   80   130\n",
       "WMT         40   55    70"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython.display import display_html\n",
    "years = 2016, 2017, 2018\n",
    "stock_tables = [pd.read_csv(\n",
    "    'data/stocks_{}.csv'.format(year), index_col='Symbol')\n",
    "    for year in years]\n",
    "stocks_2016, stocks_2017, stocks_2018 = stock_tables\n",
    "stocks_2016"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>50</td>\n",
       "      <td>120</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>100</td>\n",
       "      <td>30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>87</td>\n",
       "      <td>75</td>\n",
       "      <td>95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLB</th>\n",
       "      <td>20</td>\n",
       "      <td>55</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>500</td>\n",
       "      <td>15</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Shares  Low  High\n",
       "Symbol                   \n",
       "AAPL        50  120   140\n",
       "GE         100   30    40\n",
       "IBM         87   75    95\n",
       "SLB         20   55    85\n",
       "TXN        500   15    23\n",
       "TSLA       100  100   300"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_2017"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>40</td>\n",
       "      <td>135</td>\n",
       "      <td>170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AMZN</th>\n",
       "      <td>8</td>\n",
       "      <td>900</td>\n",
       "      <td>1125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>220</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Shares  Low  High\n",
       "Symbol                   \n",
       "AAPL        40  135   170\n",
       "AMZN         8  900  1125\n",
       "TSLA        50  220   400"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_2018"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">2016</th>\n",
       "      <th>AAPL</th>\n",
       "      <td>80</td>\n",
       "      <td>95</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>80</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40</td>\n",
       "      <td>55</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">2017</th>\n",
       "      <th>AAPL</th>\n",
       "      <td>50</td>\n",
       "      <td>120</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>100</td>\n",
       "      <td>30</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>500</td>\n",
       "      <td>15</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">2018</th>\n",
       "      <th>AAPL</th>\n",
       "      <td>40</td>\n",
       "      <td>135</td>\n",
       "      <td>170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AMZN</th>\n",
       "      <td>8</td>\n",
       "      <td>900</td>\n",
       "      <td>1125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>220</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             Shares  Low  High\n",
       "     Symbol                   \n",
       "2016 AAPL        80   95   110\n",
       "     TSLA        50   80   130\n",
       "     WMT         40   55    70\n",
       "2017 AAPL        50  120   140\n",
       "     GE         100   30    40\n",
       "...             ...  ...   ...\n",
       "     TXN        500   15    23\n",
       "     TSLA       100  100   300\n",
       "2018 AAPL        40  135   170\n",
       "     AMZN         8  900  1125\n",
       "     TSLA        50  220   400\n",
       "\n",
       "[12 rows x 3 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat(stock_tables, keys=[2016, 2017, 2018])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/matt/.env/364/lib/python3.6/site-packages/ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
      "of pandas will change to not sort by default.\n",
      "\n",
      "To accept the future behavior, pass 'sort=False'.\n",
      "\n",
      "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
      "\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">2016</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"3\" halign=\"left\">2018</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "      <th>...</th>\n",
       "      <th>Shares</th>\n",
       "      <th>Low</th>\n",
       "      <th>High</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>...</td>\n",
       "      <td>40.0</td>\n",
       "      <td>135.0</td>\n",
       "      <td>170.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AMZN</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>900.0</td>\n",
       "      <td>1125.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLB</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>...</td>\n",
       "      <td>50.0</td>\n",
       "      <td>220.0</td>\n",
       "      <td>400.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       2016               ...   2018               \n",
       "     Shares   Low   High  ... Shares    Low    High\n",
       "AAPL   80.0  95.0  110.0  ...   40.0  135.0   170.0\n",
       "AMZN    NaN   NaN    NaN  ...    8.0  900.0  1125.0\n",
       "GE      NaN   NaN    NaN  ...    NaN    NaN     NaN\n",
       "IBM     NaN   NaN    NaN  ...    NaN    NaN     NaN\n",
       "SLB     NaN   NaN    NaN  ...    NaN    NaN     NaN\n",
       "TSLA   50.0  80.0  130.0  ...   50.0  220.0   400.0\n",
       "TXN     NaN   NaN    NaN  ...    NaN    NaN     NaN\n",
       "WMT    40.0  55.0   70.0  ...    NaN    NaN     NaN\n",
       "\n",
       "[8 rows x 9 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat(dict(zip(years, stock_tables)), axis='columns')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares_2016</th>\n",
       "      <th>Low_2016</th>\n",
       "      <th>High_2016</th>\n",
       "      <th>Shares_2017</th>\n",
       "      <th>Low_2017</th>\n",
       "      <th>High_2017</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>140.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>87.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>95.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLB</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>85.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>300.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>500.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>23.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Shares_2016  Low_2016  High_2016  Shares_2017  \\\n",
       "Symbol                                                  \n",
       "AAPL        80.0         95.0     110.0       50.0      \n",
       "GE           NaN          NaN       NaN      100.0      \n",
       "IBM          NaN          NaN       NaN       87.0      \n",
       "SLB          NaN          NaN       NaN       20.0      \n",
       "TSLA        50.0         80.0     130.0      100.0      \n",
       "TXN          NaN          NaN       NaN      500.0      \n",
       "WMT         40.0         55.0      70.0        NaN      \n",
       "\n",
       "        Low_2017  High_2017  \n",
       "Symbol                       \n",
       "AAPL       120.0     140.0   \n",
       "GE          30.0      40.0   \n",
       "IBM         75.0      95.0   \n",
       "SLB         55.0      85.0   \n",
       "TSLA       100.0     300.0   \n",
       "TXN         15.0      23.0   \n",
       "WMT          NaN       NaN   "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_2016.join(stocks_2017, lsuffix='_2016',\n",
    "    rsuffix='_2017', how='outer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares_2016</th>\n",
       "      <th>Low_2016</th>\n",
       "      <th>High_2016</th>\n",
       "      <th>...</th>\n",
       "      <th>Shares_2018</th>\n",
       "      <th>Low_2018</th>\n",
       "      <th>High_2018</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80.0</td>\n",
       "      <td>95.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>...</td>\n",
       "      <td>40.0</td>\n",
       "      <td>135.0</td>\n",
       "      <td>170.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>...</td>\n",
       "      <td>50.0</td>\n",
       "      <td>220.0</td>\n",
       "      <td>400.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WMT</th>\n",
       "      <td>40.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GE</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLB</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TXN</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AMZN</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>8.0</td>\n",
       "      <td>900.0</td>\n",
       "      <td>1125.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Shares_2016  Low_2016  High_2016  ...  Shares_2018  \\\n",
       "AAPL      80.0         95.0     110.0   ...      40.0      \n",
       "TSLA      50.0         80.0     130.0   ...      50.0      \n",
       "WMT       40.0         55.0      70.0   ...       NaN      \n",
       "GE         NaN          NaN       NaN   ...       NaN      \n",
       "IBM        NaN          NaN       NaN   ...       NaN      \n",
       "SLB        NaN          NaN       NaN   ...       NaN      \n",
       "TXN        NaN          NaN       NaN   ...       NaN      \n",
       "AMZN       NaN          NaN       NaN   ...       8.0      \n",
       "\n",
       "      Low_2018  High_2018  \n",
       "AAPL     135.0     170.0   \n",
       "TSLA     220.0     400.0   \n",
       "WMT        NaN       NaN   \n",
       "GE         NaN       NaN   \n",
       "IBM        NaN       NaN   \n",
       "SLB        NaN       NaN   \n",
       "TXN        NaN       NaN   \n",
       "AMZN     900.0    1125.0   \n",
       "\n",
       "[8 rows x 9 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "other = [stocks_2017.add_suffix('_2017'),\n",
    "    stocks_2018.add_suffix('_2018')]\n",
    "stocks_2016.add_suffix('_2016').join(other, how='outer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/matt/.env/364/lib/python3.6/site-packages/ipykernel_launcher.py:4: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
      "of pandas will change to not sort by default.\n",
      "\n",
      "To accept the future behavior, pass 'sort=False'.\n",
      "\n",
      "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
      "\n",
      "  after removing the cwd from sys.path.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stock_join = stocks_2016.add_suffix('_2016').join(other,\n",
    "    how='outer')\n",
    "stock_concat = pd.concat(dict(zip(years,stock_tables)),\n",
    "    axis='columns')\n",
    "level_1 = stock_concat.columns.get_level_values(1)\n",
    "level_0 = stock_concat.columns.get_level_values(0).astype(str)\n",
    "stock_concat.columns = level_1 + '_' + level_0\n",
    "stock_join.equals(stock_concat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Shares_x</th>\n",
       "      <th>Low_x</th>\n",
       "      <th>High_x</th>\n",
       "      <th>Shares_y</th>\n",
       "      <th>Low_y</th>\n",
       "      <th>High_y</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Symbol</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>80</td>\n",
       "      <td>95</td>\n",
       "      <td>110</td>\n",
       "      <td>50</td>\n",
       "      <td>120</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TSLA</th>\n",
       "      <td>50</td>\n",
       "      <td>80</td>\n",
       "      <td>130</td>\n",
       "      <td>100</td>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Shares_x  Low_x  High_x  Shares_y  Low_y  High_y\n",
       "Symbol                                                  \n",
       "AAPL          80     95     110        50    120     140\n",
       "TSLA          50     80     130       100    100     300"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_2016.merge(stocks_2017, left_index=True,\n",
    "    right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "step1 = stocks_2016.merge(stocks_2017, left_index=True,\n",
    "    right_index=True, how='outer',\n",
    "    suffixes=('_2016', '_2017'))\n",
    "stock_merge = step1.merge(stocks_2018.add_suffix('_2018'),\n",
    "    left_index=True, right_index=True,\n",
    "    how='outer')\n",
    "stock_concat.equals(stock_merge)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>item</th>\n",
       "      <th>store</th>\n",
       "      <th>price</th>\n",
       "      <th>Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pear</td>\n",
       "      <td>A</td>\n",
       "      <td>0.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>pear</td>\n",
       "      <td>B</td>\n",
       "      <td>1.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>peach</td>\n",
       "      <td>A</td>\n",
       "      <td>2.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>peach</td>\n",
       "      <td>B</td>\n",
       "      <td>3.49</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>banana</td>\n",
       "      <td>A</td>\n",
       "      <td>0.39</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>banana</td>\n",
       "      <td>B</td>\n",
       "      <td>0.49</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>steak</td>\n",
       "      <td>A</td>\n",
       "      <td>5.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>6.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>4.99</td>\n",
       "      <td>2015</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     item store  price  Date\n",
       "0    pear     A   0.99  2017\n",
       "1    pear     B   1.99  2017\n",
       "2   peach     A   2.99  2017\n",
       "3   peach     B   3.49  2017\n",
       "4  banana     A   0.39  2017\n",
       "5  banana     B   0.49  2017\n",
       "6   steak     A   5.99  2017\n",
       "7   steak     B   6.99  2017\n",
       "8   steak     B   4.99  2015"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "names = ['prices', 'transactions']\n",
    "food_tables = [pd.read_csv('data/food_{}.csv'.format(name))\n",
    "    for name in names]\n",
    "food_prices, food_transactions = food_tables\n",
    "food_prices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>custid</th>\n",
       "      <th>item</th>\n",
       "      <th>store</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>pear</td>\n",
       "      <td>A</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>banana</td>\n",
       "      <td>A</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>pear</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>peach</td>\n",
       "      <td>B</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2</td>\n",
       "      <td>coconut</td>\n",
       "      <td>B</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   custid     item store  quantity\n",
       "0       1     pear     A         5\n",
       "1       1   banana     A        10\n",
       "2       2    steak     B         3\n",
       "3       2     pear     B         1\n",
       "4       2    peach     B         2\n",
       "5       2    steak     B         1\n",
       "6       2  coconut     B         4"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food_transactions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>custid</th>\n",
       "      <th>item</th>\n",
       "      <th>store</th>\n",
       "      <th>quantity</th>\n",
       "      <th>price</th>\n",
       "      <th>Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>pear</td>\n",
       "      <td>A</td>\n",
       "      <td>5</td>\n",
       "      <td>0.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>banana</td>\n",
       "      <td>A</td>\n",
       "      <td>10</td>\n",
       "      <td>0.39</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>3</td>\n",
       "      <td>6.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>3</td>\n",
       "      <td>4.99</td>\n",
       "      <td>2015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>6.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>4.99</td>\n",
       "      <td>2015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2</td>\n",
       "      <td>pear</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2</td>\n",
       "      <td>peach</td>\n",
       "      <td>B</td>\n",
       "      <td>2</td>\n",
       "      <td>3.49</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   custid    item store  quantity  price  Date\n",
       "0       1    pear     A         5   0.99  2017\n",
       "1       1  banana     A        10   0.39  2017\n",
       "2       2   steak     B         3   6.99  2017\n",
       "3       2   steak     B         3   4.99  2015\n",
       "4       2   steak     B         1   6.99  2017\n",
       "5       2   steak     B         1   4.99  2015\n",
       "6       2    pear     B         1   1.99  2017\n",
       "7       2   peach     B         2   3.49  2017"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food_transactions.merge(food_prices, on=['item', 'store'])    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>custid</th>\n",
       "      <th>item</th>\n",
       "      <th>store</th>\n",
       "      <th>quantity</th>\n",
       "      <th>price</th>\n",
       "      <th>Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>pear</td>\n",
       "      <td>A</td>\n",
       "      <td>5</td>\n",
       "      <td>0.99</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>banana</td>\n",
       "      <td>A</td>\n",
       "      <td>10</td>\n",
       "      <td>0.39</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>3</td>\n",
       "      <td>6.99</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>pear</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1.99</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>peach</td>\n",
       "      <td>B</td>\n",
       "      <td>2</td>\n",
       "      <td>3.49</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>6.99</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2</td>\n",
       "      <td>coconut</td>\n",
       "      <td>B</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   custid     item store  quantity  price    Date\n",
       "0       1     pear     A         5   0.99  2017.0\n",
       "1       1   banana     A        10   0.39  2017.0\n",
       "2       2    steak     B         3   6.99  2017.0\n",
       "3       2     pear     B         1   1.99  2017.0\n",
       "4       2    peach     B         2   3.49  2017.0\n",
       "5       2    steak     B         1   6.99  2017.0\n",
       "6       2  coconut     B         4    NaN     NaN"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food_transactions.merge(food_prices.query('Date == 2017'),\n",
    "    how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>price</th>\n",
       "      <th>Date</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>item</th>\n",
       "      <th>store</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">pear</th>\n",
       "      <th>A</th>\n",
       "      <td>0.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>1.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">peach</th>\n",
       "      <th>A</th>\n",
       "      <td>2.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>3.49</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">banana</th>\n",
       "      <th>A</th>\n",
       "      <td>0.39</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>0.49</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">steak</th>\n",
       "      <th>A</th>\n",
       "      <td>5.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B</th>\n",
       "      <td>6.99</td>\n",
       "      <td>2017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              price  Date\n",
       "item   store             \n",
       "pear   A       0.99  2017\n",
       "       B       1.99  2017\n",
       "peach  A       2.99  2017\n",
       "       B       3.49  2017\n",
       "banana A       0.39  2017\n",
       "       B       0.49  2017\n",
       "steak  A       5.99  2017\n",
       "       B       6.99  2017"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food_prices_join = food_prices.query('Date == 2017') \\\n",
    "   .set_index(['item', 'store'])\n",
    "food_prices_join    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>custid</th>\n",
       "      <th>item</th>\n",
       "      <th>store</th>\n",
       "      <th>quantity</th>\n",
       "      <th>price</th>\n",
       "      <th>Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>pear</td>\n",
       "      <td>A</td>\n",
       "      <td>5</td>\n",
       "      <td>0.99</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>banana</td>\n",
       "      <td>A</td>\n",
       "      <td>10</td>\n",
       "      <td>0.39</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>3</td>\n",
       "      <td>6.99</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>pear</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1.99</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>peach</td>\n",
       "      <td>B</td>\n",
       "      <td>2</td>\n",
       "      <td>3.49</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2</td>\n",
       "      <td>steak</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>6.99</td>\n",
       "      <td>2017.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2</td>\n",
       "      <td>coconut</td>\n",
       "      <td>B</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   custid     item store  quantity  price    Date\n",
       "0       1     pear     A         5   0.99  2017.0\n",
       "1       1   banana     A        10   0.39  2017.0\n",
       "2       2    steak     B         3   6.99  2017.0\n",
       "3       2     pear     B         1   1.99  2017.0\n",
       "4       2    peach     B         2   3.49  2017.0\n",
       "5       2    steak     B         1   6.99  2017.0\n",
       "6       2  coconut     B         4    NaN     NaN"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "food_transactions.join(food_prices_join, on=['item', 'store'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "cannot handle a non-unique multi-index!",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-41-2148ae82d9cf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m pd.concat([food_transactions.set_index(['item', 'store']),\n\u001b[1;32m      2\u001b[0m            food_prices.set_index(['item', 'store'])],\n\u001b[0;32m----> 3\u001b[0;31m           axis='columns')\n\u001b[0m",
      "\u001b[0;32m~/.env/364/lib/python3.6/site-packages/pandas/core/reshape/concat.py\u001b[0m in \u001b[0;36mconcat\u001b[0;34m(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001b[0m\n\u001b[1;32m    256\u001b[0m     )\n\u001b[1;32m    257\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 258\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    259\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.env/364/lib/python3.6/site-packages/pandas/core/reshape/concat.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    466\u001b[0m                     \u001b[0mobj_labels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmgr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    467\u001b[0m                     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mnew_labels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mequals\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 468\u001b[0;31m                         \u001b[0mindexers\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj_labels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    469\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    470\u001b[0m                 \u001b[0mmgrs_indexers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.env/364/lib/python3.6/site-packages/pandas/core/indexes/multi.py\u001b[0m in \u001b[0;36mreindex\u001b[0;34m(self, target, method, level, limit, tolerance)\u001b[0m\n\u001b[1;32m   2498\u001b[0m                     )\n\u001b[1;32m   2499\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2500\u001b[0;31m                     \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cannot handle a non-unique multi-index!\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2501\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2502\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mMultiIndex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: cannot handle a non-unique multi-index!"
     ]
    }
   ],
   "source": [
    "pd.concat([food_transactions.set_index(['item', 'store']),\n",
    "           food_prices.set_index(['item', 'store'])],\n",
    "          axis='columns')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There's more..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Midgrade</th>\n",
       "      <th>Premium</th>\n",
       "      <th>Diesel</th>\n",
       "      <th>All Grades</th>\n",
       "      <th>Regular</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Week</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2017-09-25</th>\n",
       "      <td>2.859</td>\n",
       "      <td>3.105</td>\n",
       "      <td>2.788</td>\n",
       "      <td>2.701</td>\n",
       "      <td>2.583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-09-18</th>\n",
       "      <td>2.906</td>\n",
       "      <td>3.151</td>\n",
       "      <td>2.791</td>\n",
       "      <td>2.750</td>\n",
       "      <td>2.634</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-09-11</th>\n",
       "      <td>2.953</td>\n",
       "      <td>3.197</td>\n",
       "      <td>2.802</td>\n",
       "      <td>2.800</td>\n",
       "      <td>2.685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-09-04</th>\n",
       "      <td>2.946</td>\n",
       "      <td>3.191</td>\n",
       "      <td>2.758</td>\n",
       "      <td>2.794</td>\n",
       "      <td>2.679</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017-08-28</th>\n",
       "      <td>2.668</td>\n",
       "      <td>2.901</td>\n",
       "      <td>2.605</td>\n",
       "      <td>2.513</td>\n",
       "      <td>2.399</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007-01-29</th>\n",
       "      <td>2.277</td>\n",
       "      <td>2.381</td>\n",
       "      <td>2.413</td>\n",
       "      <td>2.213</td>\n",
       "      <td>2.165</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007-01-22</th>\n",
       "      <td>2.285</td>\n",
       "      <td>2.391</td>\n",
       "      <td>2.430</td>\n",
       "      <td>2.216</td>\n",
       "      <td>2.165</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007-01-15</th>\n",
       "      <td>2.347</td>\n",
       "      <td>2.453</td>\n",
       "      <td>2.463</td>\n",
       "      <td>2.280</td>\n",
       "      <td>2.229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007-01-08</th>\n",
       "      <td>2.418</td>\n",
       "      <td>2.523</td>\n",
       "      <td>2.537</td>\n",
       "      <td>2.354</td>\n",
       "      <td>2.306</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007-01-01</th>\n",
       "      <td>2.442</td>\n",
       "      <td>2.547</td>\n",
       "      <td>2.580</td>\n",
       "      <td>2.382</td>\n",
       "      <td>2.334</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>561 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           Midgrade  Premium  Diesel  All Grades  Regular\n",
       "Week                                                     \n",
       "2017-0...     2.859    3.105   2.788     2.701      2.583\n",
       "2017-0...     2.906    3.151   2.791     2.750      2.634\n",
       "2017-0...     2.953    3.197   2.802     2.800      2.685\n",
       "2017-0...     2.946    3.191   2.758     2.794      2.679\n",
       "2017-0...     2.668    2.901   2.605     2.513      2.399\n",
       "...             ...      ...     ...       ...        ...\n",
       "2007-0...     2.277    2.381   2.413     2.213      2.165\n",
       "2007-0...     2.285    2.391   2.430     2.216      2.165\n",
       "2007-0...     2.347    2.453   2.463     2.280      2.229\n",
       "2007-0...     2.418    2.523   2.537     2.354      2.306\n",
       "2007-0...     2.442    2.547   2.580     2.382      2.334\n",
       "\n",
       "[561 rows x 5 columns]"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import glob\n",
    "df_list = []\n",
    "for filename in glob.glob('data/gas prices/*.csv'):\n",
    "    df_list.append(pd.read_csv(filename, index_col='Week',\n",
    "    parse_dates=['Week']))\n",
    "gas = pd.concat(df_list, axis='columns')\n",
    "gas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Connecting to SQL databases"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "from sqlalchemy import create_engine\n",
    "engine = create_engine('sqlite:///data/chinook.db')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TrackId</th>\n",
       "      <th>Name</th>\n",
       "      <th>AlbumId</th>\n",
       "      <th>...</th>\n",
       "      <th>Milliseconds</th>\n",
       "      <th>Bytes</th>\n",
       "      <th>UnitPrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>For T...</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>343719</td>\n",
       "      <td>11170334</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Balls...</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>342562</td>\n",
       "      <td>5510424</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Fast ...</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>230619</td>\n",
       "      <td>3990994</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Restl...</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>252051</td>\n",
       "      <td>4331779</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Princ...</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>375418</td>\n",
       "      <td>6290521</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3498</th>\n",
       "      <td>3499</td>\n",
       "      <td>Pini ...</td>\n",
       "      <td>343</td>\n",
       "      <td>...</td>\n",
       "      <td>286741</td>\n",
       "      <td>4718950</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3499</th>\n",
       "      <td>3500</td>\n",
       "      <td>Strin...</td>\n",
       "      <td>344</td>\n",
       "      <td>...</td>\n",
       "      <td>139200</td>\n",
       "      <td>2283131</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3500</th>\n",
       "      <td>3501</td>\n",
       "      <td>L'orf...</td>\n",
       "      <td>345</td>\n",
       "      <td>...</td>\n",
       "      <td>66639</td>\n",
       "      <td>1189062</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3501</th>\n",
       "      <td>3502</td>\n",
       "      <td>Quint...</td>\n",
       "      <td>346</td>\n",
       "      <td>...</td>\n",
       "      <td>221331</td>\n",
       "      <td>3665114</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3502</th>\n",
       "      <td>3503</td>\n",
       "      <td>Koyaa...</td>\n",
       "      <td>347</td>\n",
       "      <td>...</td>\n",
       "      <td>206005</td>\n",
       "      <td>3305164</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3503 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      TrackId      Name  AlbumId  ...  Milliseconds     Bytes  \\\n",
       "0           1  For T...        1  ...    343719      11170334   \n",
       "1           2  Balls...        2  ...    342562       5510424   \n",
       "2           3  Fast ...        3  ...    230619       3990994   \n",
       "3           4  Restl...        3  ...    252051       4331779   \n",
       "4           5  Princ...        3  ...    375418       6290521   \n",
       "...       ...       ...      ...  ...       ...           ...   \n",
       "3498     3499  Pini ...      343  ...    286741       4718950   \n",
       "3499     3500  Strin...      344  ...    139200       2283131   \n",
       "3500     3501  L'orf...      345  ...     66639       1189062   \n",
       "3501     3502  Quint...      346  ...    221331       3665114   \n",
       "3502     3503  Koyaa...      347  ...    206005       3305164   \n",
       "\n",
       "     UnitPrice  \n",
       "0         0.99  \n",
       "1         0.99  \n",
       "2         0.99  \n",
       "3         0.99  \n",
       "4         0.99  \n",
       "...        ...  \n",
       "3498      0.99  \n",
       "3499      0.99  \n",
       "3500      0.99  \n",
       "3501      0.99  \n",
       "3502      0.99  \n",
       "\n",
       "[3503 rows x 9 columns]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tracks = pd.read_sql_table('tracks', engine)\n",
    "tracks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Milliseconds</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Rock</td>\n",
       "      <td>343719</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Rock</td>\n",
       "      <td>342562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Rock</td>\n",
       "      <td>230619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Rock</td>\n",
       "      <td>252051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Rock</td>\n",
       "      <td>375418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3498</th>\n",
       "      <td>Class...</td>\n",
       "      <td>286741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3499</th>\n",
       "      <td>Class...</td>\n",
       "      <td>139200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3500</th>\n",
       "      <td>Class...</td>\n",
       "      <td>66639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3501</th>\n",
       "      <td>Class...</td>\n",
       "      <td>221331</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3502</th>\n",
       "      <td>Opera</td>\n",
       "      <td>174813</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3503 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          Name  Milliseconds\n",
       "0         Rock    343719    \n",
       "1         Rock    342562    \n",
       "2         Rock    230619    \n",
       "3         Rock    252051    \n",
       "4         Rock    375418    \n",
       "...        ...       ...    \n",
       "3498  Class...    286741    \n",
       "3499  Class...    139200    \n",
       "3500  Class...     66639    \n",
       "3501  Class...    221331    \n",
       "3502     Opera    174813    \n",
       "\n",
       "[3503 rows x 2 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(pd.read_sql_table('genres', engine)\n",
    "     .merge(tracks[['GenreId', 'Milliseconds']],\n",
    "            on='GenreId', how='left') \n",
    "     .drop('GenreId', axis='columns')\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Name\n",
       "Rock And Roll      00:02:14\n",
       "Opera              00:02:54\n",
       "Hip Hop/Rap        00:02:58\n",
       "Easy Listening     00:03:09\n",
       "Bossa Nova         00:03:39\n",
       "                     ...   \n",
       "Comedy             00:26:25\n",
       "TV Shows           00:35:45\n",
       "Drama              00:42:55\n",
       "Science Fiction    00:43:45\n",
       "Sci Fi & Fantasy   00:48:31\n",
       "Name: Milliseconds, Length: 25, dtype: timedelta64[ns]"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(pd.read_sql_table('genres', engine)\n",
    "     .merge(tracks[['GenreId', 'Milliseconds']],\n",
    "            on='GenreId', how='left') \n",
    "     .drop('GenreId', axis='columns')\n",
    "     .groupby('Name')\n",
    "     ['Milliseconds']\n",
    "     .mean()\n",
    "     .pipe(lambda s_: pd.to_timedelta(s_, unit='ms'))\n",
    "     .dt.floor('s')\n",
    "     .sort_values()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CustomerId</th>\n",
       "      <th>FirstName</th>\n",
       "      <th>LastName</th>\n",
       "      <th>InvoiceId</th>\n",
       "      <th>UnitPrice</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Luís</td>\n",
       "      <td>Gonça...</td>\n",
       "      <td>98</td>\n",
       "      <td>1.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Luís</td>\n",
       "      <td>Gonça...</td>\n",
       "      <td>98</td>\n",
       "      <td>1.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Luís</td>\n",
       "      <td>Gonça...</td>\n",
       "      <td>121</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>Luís</td>\n",
       "      <td>Gonça...</td>\n",
       "      <td>121</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>Luís</td>\n",
       "      <td>Gonça...</td>\n",
       "      <td>121</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2235</th>\n",
       "      <td>59</td>\n",
       "      <td>Puja</td>\n",
       "      <td>Sriva...</td>\n",
       "      <td>284</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2236</th>\n",
       "      <td>59</td>\n",
       "      <td>Puja</td>\n",
       "      <td>Sriva...</td>\n",
       "      <td>284</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2237</th>\n",
       "      <td>59</td>\n",
       "      <td>Puja</td>\n",
       "      <td>Sriva...</td>\n",
       "      <td>284</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2238</th>\n",
       "      <td>59</td>\n",
       "      <td>Puja</td>\n",
       "      <td>Sriva...</td>\n",
       "      <td>284</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2239</th>\n",
       "      <td>59</td>\n",
       "      <td>Puja</td>\n",
       "      <td>Sriva...</td>\n",
       "      <td>284</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2240 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      CustomerId FirstName  LastName  InvoiceId  UnitPrice  \\\n",
       "0            1        Luís  Gonça...        98       1.99    \n",
       "1            1        Luís  Gonça...        98       1.99    \n",
       "2            1        Luís  Gonça...       121       0.99    \n",
       "3            1        Luís  Gonça...       121       0.99    \n",
       "4            1        Luís  Gonça...       121       0.99    \n",
       "...        ...         ...       ...       ...        ...    \n",
       "2235        59        Puja  Sriva...       284       0.99    \n",
       "2236        59        Puja  Sriva...       284       0.99    \n",
       "2237        59        Puja  Sriva...       284       0.99    \n",
       "2238        59        Puja  Sriva...       284       0.99    \n",
       "2239        59        Puja  Sriva...       284       0.99    \n",
       "\n",
       "      Quantity  \n",
       "0            1  \n",
       "1            1  \n",
       "2            1  \n",
       "3            1  \n",
       "4            1  \n",
       "...        ...  \n",
       "2235         1  \n",
       "2236         1  \n",
       "2237         1  \n",
       "2238         1  \n",
       "2239         1  \n",
       "\n",
       "[2240 rows x 6 columns]"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cust = pd.read_sql_table('customers', engine,\n",
    "    columns=['CustomerId','FirstName',\n",
    "    'LastName'])\n",
    "invoice = pd.read_sql_table('invoices', engine,\n",
    "    columns=['InvoiceId','CustomerId'])\n",
    "ii = pd.read_sql_table('invoice_items', engine,\n",
    "    columns=['InvoiceId', 'UnitPrice', 'Quantity'])\n",
    "(cust\n",
    "    .merge(invoice, on='CustomerId') \n",
    "    .merge(ii, on='InvoiceId')\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CustomerId  FirstName  LastName  \n",
       "6           Helena     Holý          49.62\n",
       "26          Richard    Cunningham    47.62\n",
       "57          Luis       Rojas         46.62\n",
       "46          Hugh       O'Reilly      45.62\n",
       "45          Ladislav   Kovács        45.62\n",
       "                                     ...  \n",
       "32          Aaron      Mitchell      37.62\n",
       "31          Martha     Silk          37.62\n",
       "29          Robert     Brown         37.62\n",
       "27          Patrick    Gray          37.62\n",
       "59          Puja       Srivastava    36.64\n",
       "Name: Total, Length: 59, dtype: float64"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(cust\n",
    "    .merge(invoice, on='CustomerId') \n",
    "    .merge(ii, on='InvoiceId')\n",
    "    .assign(Total=lambda df_:df_.Quantity * df_.UnitPrice)\n",
    "    .groupby(['CustomerId', 'FirstName', 'LastName'])\n",
    "    ['Total']\n",
    "    .sum()\n",
    "    .sort_values(ascending=False) \n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There's more..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>avg_time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Rock ...</td>\n",
       "      <td>00:02:14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Opera</td>\n",
       "      <td>00:02:54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Hip H...</td>\n",
       "      <td>00:02:58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Easy ...</td>\n",
       "      <td>00:03:09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Bossa...</td>\n",
       "      <td>00:03:39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Comedy</td>\n",
       "      <td>00:26:25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>TV Shows</td>\n",
       "      <td>00:35:45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>Drama</td>\n",
       "      <td>00:42:55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Scien...</td>\n",
       "      <td>00:43:45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Sci F...</td>\n",
       "      <td>00:48:31</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>25 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Name  avg_time\n",
       "0   Rock ...  00:02:14\n",
       "1      Opera  00:02:54\n",
       "2   Hip H...  00:02:58\n",
       "3   Easy ...  00:03:09\n",
       "4   Bossa...  00:03:39\n",
       "..       ...       ...\n",
       "20    Comedy  00:26:25\n",
       "21  TV Shows  00:35:45\n",
       "22     Drama  00:42:55\n",
       "23  Scien...  00:43:45\n",
       "24  Sci F...  00:48:31\n",
       "\n",
       "[25 rows x 2 columns]"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sql_string1 = '''\n",
    "SELECT\n",
    "    Name,\n",
    "    time(avg(Milliseconds) / 1000, 'unixepoch') as avg_time\n",
    "FROM (\n",
    "      SELECT\n",
    "          g.Name,\n",
    "          t.Milliseconds\n",
    "      FROM\n",
    "          genres as g\n",
    "      JOIN\n",
    "          tracks as t on\n",
    "          g.genreid == t.genreid\n",
    "     )\n",
    "GROUP BY Name\n",
    "ORDER BY avg_time'''\n",
    "pd.read_sql_query(sql_string1, engine)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "sql_string2 = '''\n",
    "   SELECT\n",
    "         c.customerid,\n",
    "         c.FirstName,\n",
    "         c.LastName,\n",
    "         sum(ii.quantity * ii.unitprice) as Total\n",
    "   FROM\n",
    "        customers as c\n",
    "   JOIN\n",
    "        invoices as i\n",
    "        on c.customerid = i.customerid\n",
    "   JOIN\n",
    "       invoice_items as ii\n",
    "       on i.invoiceid = ii.invoiceid\n",
    "   GROUP BY\n",
    "       c.customerid, c.FirstName, c.LastName\n",
    "   ORDER BY\n",
    "       Total desc'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CustomerId</th>\n",
       "      <th>FirstName</th>\n",
       "      <th>LastName</th>\n",
       "      <th>Total</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>Helena</td>\n",
       "      <td>Holý</td>\n",
       "      <td>49.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>26</td>\n",
       "      <td>Richard</td>\n",
       "      <td>Cunni...</td>\n",
       "      <td>47.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>57</td>\n",
       "      <td>Luis</td>\n",
       "      <td>Rojas</td>\n",
       "      <td>46.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>45</td>\n",
       "      <td>Ladislav</td>\n",
       "      <td>Kovács</td>\n",
       "      <td>45.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>46</td>\n",
       "      <td>Hugh</td>\n",
       "      <td>O'Reilly</td>\n",
       "      <td>45.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>53</td>\n",
       "      <td>Phil</td>\n",
       "      <td>Hughes</td>\n",
       "      <td>37.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>54</td>\n",
       "      <td>Steve</td>\n",
       "      <td>Murray</td>\n",
       "      <td>37.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>55</td>\n",
       "      <td>Mark</td>\n",
       "      <td>Taylor</td>\n",
       "      <td>37.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>56</td>\n",
       "      <td>Diego</td>\n",
       "      <td>Gutié...</td>\n",
       "      <td>37.62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>59</td>\n",
       "      <td>Puja</td>\n",
       "      <td>Sriva...</td>\n",
       "      <td>36.64</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>59 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    CustomerId FirstName  LastName  Total\n",
       "0          6      Helena      Holý  49.62\n",
       "1         26     Richard  Cunni...  47.62\n",
       "2         57        Luis     Rojas  46.62\n",
       "3         45    Ladislav    Kovács  45.62\n",
       "4         46        Hugh  O'Reilly  45.62\n",
       "..       ...         ...       ...    ...\n",
       "54        53        Phil    Hughes  37.62\n",
       "55        54       Steve    Murray  37.62\n",
       "56        55        Mark    Taylor  37.62\n",
       "57        56       Diego  Gutié...  37.62\n",
       "58        59        Puja  Sriva...  36.64\n",
       "\n",
       "[59 rows x 4 columns]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_sql_query(sql_string2, engine)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "jupytext": {
   "cell_metadata_filter": "-all",
   "main_language": "python",
   "notebook_metadata_filter": "-all"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
